Let \(X_t\) be the true concentration of chlorophyll-a on day \(t\) and \(Y_t\) be the measured value on day \(t\).

These are all our covariates:

Set up

library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.2
## Loaded modules: basemod,bugs
library(ecoforecastR)

source("01_download_data.R")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'datetime', 'site_id'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
source("02_combine_data.R")

Null Model(s)

Random Walk Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

source("fit_random_walk.R")
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 2007
##    Unobserved stochastic nodes: 3481
##    Total graph size: 5495
## 
## Initializing model

Diagnostics

# Discard burn-in
rwalk.params <- window(rwalk.jags.out[,1:2], start = 1000)

# Plot and summarize
plot(rwalk.params)

summary(rwalk.params)
## 
## Iterations = 1000:3000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 2001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean     SD Naive SE Time-series SE
## tau_add  4.464 0.2132 0.002752        0.00984
## tau_obs 24.175 2.8603 0.036917        0.19475
## 
## 2. Quantiles for each variable:
## 
##           2.5%    25%    50%    75%  97.5%
## tau_add  4.065  4.317  4.459  4.602  4.899
## tau_obs 19.309 22.150 23.907 25.975 30.332
cor(as.matrix(rwalk.params))
##            tau_add    tau_obs
## tau_add  1.0000000 -0.5239799
## tau_obs -0.5239799  1.0000000
pairs(as.matrix(rwalk.params))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the random walk model
x.cols <- grep("^x",colnames(rwalk.out)) ## grab all columns that start with the letter x
ci_rwalk <- apply(rwalk.out[,x.cols], 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_rwalk[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Random Walk Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_rwalk[1,], ci_rwalk[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Previous Year’s Chlorophyll-a Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t} \sim N(X_{t-365}, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

source("fit_previous_year_model.R")
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 2007
##    Unobserved stochastic nodes: 3481
##    Total graph size: 5495
## 
## Initializing model

Diagnostics

# Discard burn-in
pyear.params <- window(pyear.jags.out[,1:2], start = 1000)

# Plot and summarize
plot(pyear.params)

summary(pyear.params)
## 
## Iterations = 1000:3000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 2001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##            Mean      SD  Naive SE Time-series SE
## tau_add 10.5070 2.10940 0.0272254       0.290969
## tau_obs  0.5375 0.02019 0.0002605       0.000799
## 
## 2. Quantiles for each variable:
## 
##          2.5%    25%    50%    75%  97.5%
## tau_add 7.407 8.9648 10.107 11.688 15.491
## tau_obs 0.499 0.5236  0.537  0.551  0.578
cor(as.matrix(pyear.params))
##            tau_add    tau_obs
## tau_add  1.0000000 -0.2222468
## tau_obs -0.2222468  1.0000000
pairs(as.matrix(pyear.params))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the random walk model
x.cols <- grep("^x",colnames(pyear.out)) ## grab all columns that start with the letter x
ci_pyear <- apply(pyear.out[,x.cols], 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_pyear[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Previous Year's Chlorophyll-A Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_pyear[1,], ci_pyear[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Internal Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t + \beta_0 + \beta_{DO} Z_{DO, t} + \beta_{pH}Z_{pH, t} + \beta_{\text{turb}}Z_{\text{turb}, t} + \beta_X X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

internal_model <- ecoforecastR::fit_dlm(model = list(obs = "chla", fixed = "1 + X + oxygen + daily_pH + daily_turbidity"), cleaned_data)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n         betaX ~dnorm(0,0.001)\n      betaIntercept~dnorm(0,0.001)\n     betaoxygen~dnorm(0,0.001)\n     betadaily_pH~dnorm(0,0.001)\n     betadaily_turbidity~dnorm(0,0.001)\n   for(j in 1: 4 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\nXf[t,4] ~ dnorm(muXf[4],tauXf[4])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaX*x[t-1] + betaIntercept*Xf[t,1] + betaoxygen*Xf[t,2] + betadaily_pH*Xf[t,3] + betadaily_turbidity*Xf[t,4]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 10890
##    Unobserved stochastic nodes: 5583
##    Total graph size: 30120
## 
## Initializing model
#names(internal.model)

Parameter Diagnostics

## parameter diagnostics
params_internal <- window(internal_model$params,start=1000) ## remove burn-in
plot(params_internal)

cor(as.matrix(params_internal))
##                     betaIntercept       betaX betadaily_pH betadaily_turbidity
## betaIntercept          1.00000000 -0.41050494 -0.827564415          0.32080057
## betaX                 -0.41050494  1.00000000  0.058997466         -0.17884622
## betadaily_pH          -0.82756441  0.05899747  1.000000000         -0.20862952
## betadaily_turbidity    0.32080057 -0.17884622 -0.208629524          1.00000000
## betaoxygen            -0.61911649  0.52745641  0.084039504         -0.28021532
## tau_add               -0.09522875  0.19035953  0.005195872         -0.02781123
## tau_obs                0.07906744 -0.22403051  0.032923568          0.03841447
##                     betaoxygen      tau_add     tau_obs
## betaIntercept       -0.6191165 -0.095228745  0.07906744
## betaX                0.5274564  0.190359532 -0.22403051
## betadaily_pH         0.0840395  0.005195872  0.03292357
## betadaily_turbidity -0.2802153 -0.027811230  0.03841447
## betaoxygen           1.0000000  0.145390472 -0.16490892
## tau_add              0.1453905  1.000000000 -0.54709835
## tau_obs             -0.1649089 -0.547098350  1.00000000
pairs(as.matrix(params_internal))

summary(params_internal)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                          Mean        SD  Naive SE Time-series SE
## betaIntercept        0.531385  0.166901 1.523e-03      3.585e-02
## betaX               -0.067763  0.007549 6.890e-05      4.766e-04
## betadaily_pH         0.008952  0.023645 2.158e-04      4.004e-03
## betadaily_turbidity  0.002705  0.002547 2.324e-05      8.282e-05
## betaoxygen          -0.057975  0.011187 1.021e-04      1.179e-03
## tau_add              3.979082  0.172604 1.575e-03      8.931e-03
## tau_obs             61.170719 16.429575 1.500e-01      1.723e+00
## 
## 2. Quantiles for each variable:
## 
##                          2.5%        25%       50%       75%      97.5%
## betaIntercept        0.204696  0.4302380  0.521800  0.624170   0.905140
## betaX               -0.082581 -0.0729158 -0.067676 -0.062630  -0.053152
## betadaily_pH        -0.046160 -0.0031020  0.010942  0.024009   0.050577
## betadaily_turbidity -0.002261  0.0009598  0.002692  0.004456   0.007635
## betaoxygen          -0.078909 -0.0656741 -0.058313 -0.050523  -0.035062
## tau_add              3.663349  3.8589672  3.973880  4.089321   4.337036
## tau_obs             38.239120 49.7896851 58.429882 69.140470 101.640817

Time Series Plot

time.rng = c(1,nrow(cleaned_data))       ## you can adjust this line to zoom in and out on specific time intervals

out <- as.matrix(internal_model$predict)
ci <- apply(out,2,quantile,c(0.025,0.5,0.975))

plot(cleaned_data$datetime,ci[2,],type='n',ylim=range(cleaned_data$chla,na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Internal Factors Model")
## adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]],cleaned_data$datetime[time.rng[2]],by='month'), format = "%Y-%m")
}
ecoforecastR::ciEnvelope(cleaned_data$datetime,ci[1,],ci[3,],col=ecoforecastR::col.alpha("lightBlue",0.75))
points(cleaned_data$datetime, cleaned_data$chla,pch="+",cex=0.5)

cor(as.matrix(params_internal))
##                     betaIntercept       betaX betadaily_pH betadaily_turbidity
## betaIntercept          1.00000000 -0.41050494 -0.827564415          0.32080057
## betaX                 -0.41050494  1.00000000  0.058997466         -0.17884622
## betadaily_pH          -0.82756441  0.05899747  1.000000000         -0.20862952
## betadaily_turbidity    0.32080057 -0.17884622 -0.208629524          1.00000000
## betaoxygen            -0.61911649  0.52745641  0.084039504         -0.28021532
## tau_add               -0.09522875  0.19035953  0.005195872         -0.02781123
## tau_obs                0.07906744 -0.22403051  0.032923568          0.03841447
##                     betaoxygen      tau_add     tau_obs
## betaIntercept       -0.6191165 -0.095228745  0.07906744
## betaX                0.5274564  0.190359532 -0.22403051
## betadaily_pH         0.0840395  0.005195872  0.03292357
## betadaily_turbidity -0.2802153 -0.027811230  0.03841447
## betaoxygen           1.0000000  0.145390472 -0.16490892
## tau_add              0.1453905  1.000000000 -0.54709835
## tau_obs             -0.1649089 -0.547098350  1.00000000

External Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(\beta_0 + \beta_{\text{temp}} Z_{\text{temp}, t} + \beta_{lr}Z_{lr, t} + \beta_{sr}Z_{sr, t} + \beta_{\text{prec}} Z_{\text{prec}, t}, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

# Set up data object (with NA handling built-in)
data_ext <- list(
  y = cleaned_data$chla,
  n = length(cleaned_data$chla),
  temp = cleaned_data$air_temperature,
  longrad = cleaned_data$surface_downwelling_longwave_flux_in_air,
  shortrad = cleaned_data$surface_downwelling_shortwave_flux_in_air,
  precip = cleaned_data$precipitation_flux,
  x_ic = 1, tau_ic = 100,
  a_obs = 1, r_obs = 1,
  a_add = 1, r_add = 1
)

# Fit the model — this version has no X term
model_ext <- "~ 1 + temp + longrad + shortrad + precip"

ef.out.external <- ecoforecastR::fit_dlm(
  model = list(obs = "y", fixed = model_ext),
  data = data_ext
)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n        betaIntercept~dnorm(0,0.001)\n     betatemp~dnorm(0,0.001)\n     betalongrad~dnorm(0,0.001)\n     betashortrad~dnorm(0,0.001)\n     betaprecip~dnorm(0,0.001)\n   for(j in 1: 5 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\nXf[t,4] ~ dnorm(muXf[4],tauXf[4])\nXf[t,5] ~ dnorm(muXf[5],tauXf[5])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaIntercept*Xf[t,1] + betatemp*Xf[t,2] + betalongrad*Xf[t,3] + betashortrad*Xf[t,4] + betaprecip*Xf[t,5]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 11442
##    Unobserved stochastic nodes: 7776
##    Total graph size: 32543
## 
## Initializing model
# Optional: save results
# save(ef.out.external, file = "external_factors.RData")

Diagnostics

# Discard burn-in
params_external <- window(ef.out.external$params, start = 1000)

# Plot and summarize
plot(params_external)

summary(params_external)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                     Mean        SD  Naive SE Time-series SE
## betaIntercept  1.922e-01 2.805e-01 2.561e-03      1.055e-01
## betalongrad    7.118e-05 4.494e-04 4.102e-06      9.456e-05
## betaprecip     5.324e-01 3.876e-01 3.538e-03      2.325e-02
## betashortrad   5.769e-05 2.165e-04 1.976e-06      1.400e-05
## betatemp      -8.318e-04 1.210e-03 1.105e-05      5.212e-04
## tau_add        4.005e+00 1.892e-01 1.727e-03      9.571e-03
## tau_obs        4.901e+01 1.165e+01 1.063e-01      1.058e+00
## 
## 2. Quantiles for each variable:
## 
##                     2.5%        25%        50%        75%     97.5%
## betaIntercept -0.3374339  1.631e-02  1.607e-01  3.310e-01 9.931e-01
## betalongrad   -0.0007444 -2.493e-04  2.947e-05  3.760e-04 1.066e-03
## betaprecip    -0.2176914  2.665e-01  5.315e-01  7.940e-01 1.294e+00
## betashortrad  -0.0003995 -8.244e-05  6.271e-05  2.030e-04 4.708e-04
## betatemp      -0.0041804 -1.437e-03 -7.351e-04 -7.037e-05 1.269e-03
## tau_add        3.6587205  3.873e+00  4.000e+00  4.128e+00 4.400e+00
## tau_obs       31.9071778  4.047e+01  4.689e+01  5.562e+01 7.717e+01
cor(as.matrix(params_external))
##               betaIntercept betalongrad   betaprecip betashortrad    betatemp
## betaIntercept   1.000000000  0.23308207 -0.006021594    0.1661727 -0.91542160
## betalongrad     0.233082073  1.00000000 -0.598101796   -0.3159200 -0.58520421
## betaprecip     -0.006021594 -0.59810180  1.000000000    0.3105995  0.20896892
## betashortrad    0.166172677 -0.31591998  0.310599512    1.0000000 -0.12270241
## betatemp       -0.915421601 -0.58520421  0.208968924   -0.1227024  1.00000000
## tau_add         0.032357139  0.09484416 -0.036934048    0.0257353 -0.07183287
## tau_obs         0.010392044 -0.13216431  0.119083231    0.0333034  0.04519270
##                   tau_add     tau_obs
## betaIntercept  0.03235714  0.01039204
## betalongrad    0.09484416 -0.13216431
## betaprecip    -0.03693405  0.11908323
## betashortrad   0.02573530  0.03330340
## betatemp      -0.07183287  0.04519270
## tau_add        1.00000000 -0.62864827
## tau_obs       -0.62864827  1.00000000
pairs(as.matrix(params_external))

Time Series Plot

## confidence interval
time.rng = c(1, nrow(cleaned_data))  ## you can adjust this line to zoom in and out on specific time intervals

# Get posterior predictions from the external factors model
out_ext <- as.matrix(ef.out.external$predict)
ci_ext <- apply(out_ext, 2, quantile, c(0.025, 0.5, 0.975))

# Plot the time series with confidence intervals
plot(cleaned_data$datetime, ci_ext[2,], type='n', ylim=range(cleaned_data$chla, na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "External Factors Model")

# Adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]], cleaned_data$datetime[time.rng[2]], by='month'), format = "%Y-%m")
}

# Add the confidence envelope
ecoforecastR::ciEnvelope(cleaned_data$datetime, ci_ext[1,], ci_ext[3,], col=ecoforecastR::col.alpha("lightBlue", 0.75))

# Plot the original data points
points(cleaned_data$datetime, cleaned_data$chla, pch="+", cex=0.5)

Combined Factors Model

Data model: \[ Y_t \sim N(X_t, \tau_\text{obs}) \]

Process model: \[ X_{t+1} \sim N(X_t + \beta_0 + \beta_{\text{temp}} Z_{\text{temp}, t} + \beta_{\text{prec}} Z_{\text{prec}, t} + \beta_X X_t, \tau_\text{add}) \]

Priors: \[ X_1 \sim N(X_{ic}, \tau_{ic}) \] \[ \tau_\text{obs} \sim \text{Gamma}(a_\text{obs}, r_\text{obs}) \] \[ \tau_\text{add} \sim \text{Gamma}(a_\text{add}, r_\text{add}) \]

Fitting the Model

# set up data object for ecoforecastR
data <- list(y = cleaned_data$chla,
             n = length(cleaned_data$chla),      ## data
             temp = cleaned_data$air_temperature,
             longrad = cleaned_data$surface_downwelling_longwave_flux_in_air,
             shortrad = cleaned_data$surface_downwelling_shortwave_flux_in_air,
             precip = cleaned_data$precipitation_flux,
             x_ic=1,tau_ic=100,      ## initial condition prior
             a_obs=1,r_obs=1,           ## obs error prior
             a_add=1,r_add=1            ## process error prior
)

## fit the model
model2 <- "~ 1 + X + temp + precip"
ef.out.combined <- ecoforecastR::fit_dlm(model=list(obs="y",fixed=model2),data)
## [1] "  \n  model{\n  \n  #### Priors\n  x[1] ~ dnorm(x_ic,tau_ic)\n  tau_obs ~ dgamma(a_obs,r_obs)\n  tau_add ~ dgamma(a_add,r_add)\n\n  #### Random Effects\n  #RANDOM  tau_alpha~dgamma(0.1,0.1)\n  #RANDOM  for(i in 1:nrep){                  \n  #RANDOM   alpha[i]~dnorm(0,tau_alpha)\n  #RANDOM  }\n\n  #### Fixed Effects\n         betaX ~dnorm(0,0.001)\n      betaIntercept~dnorm(0,0.001)\n     betatemp~dnorm(0,0.001)\n     betaprecip~dnorm(0,0.001)\n   for(j in 1: 3 ){\n    muXf[j] ~ dnorm(0,0.001)\n    tauXf[j] ~ dgamma(0.01,0.01)\n }\n\n  \n  #### Data Model\n  for(t in 1:n){\n    OBS[t] ~ dnorm(x[t],tau_obs)\n     Xf[t,1] ~ dnorm(muXf[1],tauXf[1])\nXf[t,2] ~ dnorm(muXf[2],tauXf[2])\nXf[t,3] ~ dnorm(muXf[3],tauXf[3])\n  }\n  \n  #### Process Model\n  for(t in 2:n){\n    mu[t] <- x[t-1]  + betaX*x[t-1] + betaIntercept*Xf[t,1] + betatemp*Xf[t,2] + betaprecip*Xf[t,3]\n    x[t]~dnorm(mu[t],tau_add)\n  }\n\n  }"
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 8096
##    Unobserved stochastic nodes: 5631
##    Total graph size: 24312
## 
## Initializing model
#save(ef.out.combined, file = "combined_factors.RData")

Parameter Diagnostics

#load("combined_factors.RData")

## parameter diagnostics
params <- window(ef.out.combined$params,start=1000) ## remove burn-in
plot(params)

summary(params)
## 
## Iterations = 1000:5000
## Thinning interval = 1 
## Number of chains = 3 
## Sample size per chain = 4001 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                    Mean        SD  Naive SE Time-series SE
## betaIntercept -0.368395  0.320723 2.927e-03      0.0741252
## betaX         -0.054721  0.006597 6.021e-05      0.0002811
## betaprecip     0.952007  0.310093 2.830e-03      0.0076760
## betatemp       0.001575  0.001103 1.007e-05      0.0002629
## tau_add        3.985928  0.170520 1.556e-03      0.0084174
## tau_obs       58.251369 13.506984 1.233e-01      1.2730172
## 
## 2. Quantiles for each variable:
## 
##                     2.5%        25%       50%       75%    97.5%
## betaIntercept -1.0030183 -0.5794392 -0.345296 -0.135986  0.22717
## betaX         -0.0676826 -0.0592146 -0.054644 -0.050212 -0.04193
## betaprecip     0.3421060  0.7408607  0.955181  1.163470  1.55357
## betatemp      -0.0004805  0.0007756  0.001508  0.002303  0.00375
## tau_add        3.6655604  3.8674048  3.980071  4.098066  4.33549
## tau_obs       35.8971446 48.9972451 56.607622 65.996640 90.42558
cor(as.matrix(params))
##               betaIntercept      betaX  betaprecip   betatemp     tau_add
## betaIntercept     1.0000000  0.1452199  0.13525463 -0.9983711  0.05555080
## betaX             0.1452199  1.0000000 -0.17967765 -0.1845575  0.14306248
## betaprecip        0.1352546 -0.1796776  1.00000000 -0.1526453  0.07567401
## betatemp         -0.9983711 -0.1845575 -0.15264532  1.0000000 -0.06404640
## tau_add           0.0555508  0.1430625  0.07567401 -0.0640464  1.00000000
## tau_obs          -0.1103457 -0.1564249 -0.05925654  0.1193960 -0.53237256
##                   tau_obs
## betaIntercept -0.11034575
## betaX         -0.15642486
## betaprecip    -0.05925654
## betatemp       0.11939596
## tau_add       -0.53237256
## tau_obs        1.00000000
pairs(as.matrix(params))

Time-Series Plot

## confidence interval
time.rng = c(1,nrow(cleaned_data))       ## you can adjust this line to zoom in and out on specific time intervals

out <- as.matrix(ef.out.combined$predict)
ci <- apply(out,2,quantile,c(0.025,0.5,0.975))

plot(cleaned_data$datetime,ci[2,],type='n',ylim=range(cleaned_data$chla,na.rm=TRUE),
     #log='y',
     xlim=cleaned_data$datetime[time.rng],
     xlab = "Time",
     ylab = "Chlorophyll-a",
     main = "Combined Factors Model")
## adjust x-axis label to be monthly if zoomed
if(diff(time.rng) < 100){ 
  axis.Date(1, at=seq(cleaned_data$datetime[time.rng[1]],cleaned_data$datetime[time.rng[2]],by='month'), format = "%Y-%m")
}
ecoforecastR::ciEnvelope(cleaned_data$datetime,ci[1,],ci[3,],col=ecoforecastR::col.alpha("lightBlue",0.75))
points(cleaned_data$datetime, cleaned_data$chla,pch="+",cex=0.5)